Update random script
authorJeroen van der Heijden <jeroen@transceptor.technology>
Mon, 26 Mar 2018 15:10:15 +0000 (17:10 +0200)
committerJeroen van der Heijden <jeroen@transceptor.technology>
Mon, 26 Mar 2018 15:10:15 +0000 (17:10 +0200)
test/siridb-random-data.py

index 0a93bd69b695c58332c44bc42818f60f723039d0..552472641dc9df15332a07e89b5b1ffd8e7ea462 100755 (executable)
@@ -77,8 +77,8 @@ class Series:
     _interval_range = None
     _r = None
 
-    def __init__(self, allowed_kinds=(int, float, str)):
-        self.kind = self._r.choice(allowed_kinds)
+    def __init__(self, r, allowed_kinds=(int, float, str)):
+        self.kind = r.choice(allowed_kinds)
         self.lval = {
             str: '',
             int: 0,
@@ -86,18 +86,20 @@ class Series:
         }[self.kind]
         self.lts = self._timestamp
 
-        factor = 10**self._r.randint(int(self.kind == int), 6)
+        factor = 10**r.randint(int(self.kind == int), 6)
         self.random_range = (
-            int(self._r.random() * -factor),
-            int(self._r.random() * factor) + 1)
+            int(r.random() * -factor),
+            int(r.random() * factor) + 1)
         self.sign = 1
 
-        self.likely_equal = self._r.choice([0.01, 0.1, 0.2, 0.5, 0.99])
-        self.likely_change_sign = self._r.choice([0.0, 0.1, 0.25, 0.5, 0.9])
+        self.likely_equal = r.choice([0.01, 0.1, 0.2, 0.5, 0.99])
+        self.likely_change_sign = r.choice([0.0, 0.1, 0.25, 0.5, 0.9])
 
         if self.kind == float:
             # one in ten series will show float as int, example 1.0, 2.0 etc.
-            self.as_int = self._r.random() > 0.9
+            self.as_int = r.random() > 0.9
+            self.likely_inf = r.random() * 0.5 if r.random() > 0.9 else False
+            self.likely_nan = r.random() * 0.5 if r.random() > 0.9 else False
 
         self.name = self._gen_name()
         Series._series.append(self)
@@ -113,18 +115,30 @@ class Series:
                     self.lval = 0
 
             elif self.kind == float:
-                self.lval += \
-                    self.sign * \
-                    self._r.random() * \
-                    self.random_range[1]
-                if self.as_int:
-                    self.lval = round(self.lval, 0)
+                if self.likely_inf and self._r.random() < self.likely_inf:
+                    self.lval = self.sign * math.inf
+                elif self.likely_nan and self._r.random() < self.likely_nan:
+                    self.lval = math.nan
+                else:
+                    self.lval += \
+                        self.sign * \
+                        self._r.random() * \
+                        self.random_range[1]
+                    if self.as_int:
+                        self.lval = round(self.lval, 0)
 
         return self.lval
 
     @classmethod
-    def init(cls, args, ts_factor, r):
-        cls._r = r
+    def init(cls, args, ts_factor):
+        cls._r = random.Random()
+        cls._r.seed(
+            time.time() if args.seed_data is None else args.seed_data)
+
+        series_rand = random.Random()
+        series_rand.seed(
+            time.time() if args.seed_series is None else args.seed_series)
+
         cls._ts_factor = ts_factor
         cls._timestamp = int(time.mktime(datetime.datetime.strptime(
             args.start_date,
@@ -140,16 +154,20 @@ class Series:
         kinds = [translate[k] for k in args.kinds]
 
         for i in range(args.num_series):
-            Series(allowed_kinds=kinds)
+            Series(r=series_rand, allowed_kinds=kinds)
 
     def _gen_name(self):
-        name = '/n:{}/range:{},{}/eq:{}/cs:{}/opt:{}'.format(
+        name = '/n:{}/range:{},{}/eq:{}/cs:{}/opt:{}{}{}'.format(
             len(self._series),
             self.random_range[0],
             self.random_range[1],
             self.likely_equal,
             self.likely_change_sign,
-            'as_int' if getattr(self, 'as_int', False) else '')
+            'as_int' if getattr(self, 'as_int', False) else '',
+            '(inf:{})'.format(self.likely_inf)
+            if getattr(self, 'likely_inf', False) else '',
+            '(nan:{})'.format(self.likely_nan)
+            if getattr(self, 'likely_nan', False) else '')
 
         return name
 
@@ -206,20 +224,13 @@ async def get_ts_factor(siri):
     return 10**(['s', 'ms', 'us', 'ns'].index(res['data'][0]['value'])*3)
 
 
-def queue_data(args, ts_factor):
-    r = random.Random()
-    r.seed(time.time() if args.seed is None else args.seed)
-
-    Series.init(args, ts_factor, r)
-
+def queue_data(args):
     n = args.num_batches
     while n and stop is False:
         data = Series.get_data(args)
         yield data
         n -= 1
 
-    yield None
-
 
 async def siridb_insert(siri, data, task_counter):
     '''Insert data into SiriDB.'''
@@ -249,14 +260,15 @@ async def siridb_insert(siri, data, task_counter):
 
 async def dump_data(siri, args):
     task_counter = []
+
     try:
         await siri.connect()
+
         ts_factor = await get_ts_factor(siri)
-        q = queue_data(args, ts_factor)
-        while True:
-            data = next(q)
-            if data is None:
-                break
+        Series.init(args, ts_factor)
+
+        q = queue_data(args)
+        for data in q:
 
             task_counter.append(1)
             while len(task_counter) > args.max_parallel:
@@ -265,7 +277,7 @@ async def dump_data(siri, args):
             asyncio.ensure_future(siridb_insert(siri, data, task_counter))
 
             # sleep 0 so the async loop will run to pick-up tasks
-            await asyncio.sleep(0)
+            await asyncio.sleep(args.sleep)
 
     except Exception as e:
         logging.exception(e)
@@ -311,10 +323,15 @@ if __name__ == '__main__':
         help='siridb server(s)')
 
     parser.add_argument(
-        '-S',
-        '--seed',
+        '--seed-series',
         type=str,
-        help='Optional seed. '
+        help='Optional seed for generating series. '
+             'If no seed is given, the current timestamp will be used.')
+
+    parser.add_argument(
+        '--seed-data',
+        type=str,
+        help='Optional seed for generating data. '
              'If no seed is given, the current timestamp will be used.')
 
     parser.add_argument(
@@ -336,6 +353,12 @@ if __name__ == '__main__':
         help='Timestamp interval in seconds. '
              'When <= 0 a ramdom interval is used for each data point.')
 
+    parser.add_argument(
+        '--sleep',
+        type=float,
+        default=0.0,
+        help='Sleep between inserts in seconds. (default: 0.0 seconds)')
+
     parser.add_argument(
         '--ts-randomize',
         action='store_true',
@@ -446,7 +469,7 @@ Home-page: https://github.com/transceptor-technology/siridb-email-check
             for server in args.servers.split(',')])
 
     loop = asyncio.get_event_loop()
-    loop.run_until_complete(dump_data(siri, q, args))
+    loop.run_until_complete(dump_data(siri, args))
 
     total_time = time.time() - start_time
     logging.info(